# _*_ coding: utf-8 _*_
"""
@ 时间    ：2024/10/27 18:52
@ 作者    ：旺财
@ 文件    ：02 病例关联规则分析.py
@ 说明    ：   
"""

import pandas as pd
from apyori import apriori

# 1. 数据读取
df = pd.read_excel('中医辨证.xlsx')
print('原始数据:')
print(df.head())

# 2. 数据预处理
symptoms = []
for i in df['病人症状'].tolist():
    symptoms.append(i.split(','))

rules = apriori(symptoms, min_support=0.08, min_confidence=0.7)  # min_support:最小支持度, min_confidence:最小置信度
results = list(rules)

print()
print('关联症状:')
for i in results:
    for j in i.ordered_statistics:
        X = j.items_base    # 关联规则前件
        Y = j.items_add     # 关联规则后件
        x = ', '.join([item for item in X])
        y = ', '.join([item for item in Y])
        if x:
            print(f'{x} -> {y}')